*===============================================================* * Programmer: James Nguyen, US EPA * * * * Project: Power and effect size inflation in epi. studies * * * * Purpose: * * - applicable to studies where RR is estimated * * - estimates power of the study design for testing: * * + true RR > 1 with a significance level = alpha * * (i.e., the lower bound of (100*(1-alpha) * * confidence interval) > 1 * * + true RR < 1 with a significance level = alpha * * (i.e., the upper bound of (100*(1-alpha) * * confidence interval) < 1 * * - characterizes the distribution of observed * * significant RRs * * * * Assumptions: * * - time of follow-up is different between subjects * * and the distributions of follow-up time are * * different between studies - these typically are not * * well characterized in the publications. * * * * - For simplicity, we assume all subjects have * * same time of follow-up. This assumption typically * * over-estimates the power of the study design. * * * * Notes: the DM statement in the Macro clears all the SAS * * output, SAS log, and the Results. Some WARNING and * * ERROR messages can occur in the log. * * * * - When the parameter EXACT = N (i.e., the Empirical * * Standard Error Estimates is used), WARNING and ERROR * * messages can occur in the SAS log for some datasets * * for which one group has 0 values for all subjects). * * The EXACT Test is used for these datasets. * * * * - All datasets for which both groups have 0 values for * * all subjects are not included in the data analysis. * * * * - This Macro is the revision of the code version * * revised in 10/2019 * * * * Date: 10/2021 * *==============================================================*; option nodate nonumber formDlim="=" ls=100 ps=100 nomprint; %let junk=C:\Users\JNguyen\Desktop\Junks; %Macro ESM_Rate(R0=, /* rates of disease in reference/control group */ RR=, /* true rate ratios or relative risks */ N0=, /* sample size of control group */ N1=, /* sample size of exposure group */ Exact=, /* if "Exact = Y" then Exact test for all datasets */ /* if "Exact = N" then Exact test only for non-convergent datasets */ alpha=, /* level of significance */ pctiles=, /* percentiles of significant ORs: 2.5, 5, 10, 25, 50, 75, 90, 95, 97.5 */ ipctiles=, /* percentiles that need to calculate inflation factors */ NSim=, /* number of iterations, i.e. datasets */ seed=, /* seed number to reproduce random numbers */ outfile=); *===> count number of control rates; %let R0N=1; %let R0&R0N = %nrbquote(%scan(&R0,&R0N, %str( ))); %do %while (&&R0&R0N ^=); %let R0N=%eval(&R0N+1); %let R0&R0N = %nrbquote(%scan(&R0,&R0N, %str( ))); %end; %let R0N=%eval(&R0N-1); *===> count number of RRs; %let RN=1; %let RR&RN = %nrbquote(%scan(&RR,&RN, %str( ))); %do %while (&&RR&RN ^=); %let RN=%eval(&RN+1); %let RR&RN = %nrbquote(%scan(&RR,&RN, %str( ))); %end; %let RN=%eval(&RN-1); *===> count the percentiles that need to calculate the inflation factors; %let ipn=1; %let ip&ipn = %nrbquote(%scan(&ipctiles,&ipn, %str( ))); %do %while (&&ip&ipn ^=); %let ipn=%eval(&ipn+1); %let ip&ipn = %nrbquote(%scan(&ipctiles,&ipn, %str( ))); %end; %let ipn=%eval(&ipn-1); title; *===> create a dataset of control rates and RRs; Data R_RR; set _NULL_; run; %do j = 1 %to &R0N; %do k=1 %to &RN; Data New; *==> rate of disease in reference group; R0=&&R0&j; true_RR=&&RR&k; *==> rate of disease in compared group; R1 = true_RR*R0; N0 = &N0; N1 = &N1; run; Data R_RR; set R_RR New; run; Proc datasets nolist; delete New; quit; %end; *k; %end; *j; Proc sort data = R_RR; by R0 R1 true_RR N0 N1; run; *===> create datasets with Total number of cases; Data Sim; call streaminit(&seed); set R_RR; do group = 1 to 2; Rate = (group=1)*R0 + (group=2)*R1; N = (group=1)*N0 + (group=2)*N1; *==> for the sake of simplicity, follow-up time is set = 1 for everyone; *==> Total Cases; %do Sim = 1 %to &NSim; Total&Sim = rand("Poisson", N*Rate); %end; *sim; output; end; *group; drop Rate; run; *===> create datasets with individual case-subject data; Data Sim; set Sim; do i = 1 to N; %do Sim = 1 %to &NSim; *==> case for the i-th subject at each Sim; Case&Sim = (Total&Sim >= i); %end; *sim; output; end; drop N Total:; run; Data All; set _NULL_; SCOL=.;run; ods listing close; ods select none; *===> use generalzied linear model to estimate RRs and their p-values; %do i = 1 %to &NSim; Proc SQL; create table maxSim&i as select R0, R1, true_RR, N0, N1, max(case&i) as max&i from Sim group by R0, R1, true_RR, N0, N1 order by R0, R1, true_RR, N0, N1; quit; Data Sim&i; merge Sim(keep= R0 R1 true_RR N0 N1 group i case&i) maxSim&i; by R0 R1 true_RR N0 N1; if max&i > 0; drop max&i; run; Proc datasets nolist; delete maxSim&i; quit; Proc SQL noprint; select count(*) into: Nrow from Sim&i; quit; %let Nrow=&Nrow; %if &Nrow ^= 0 %then %do; %if &Exact = Y %then %do; ods output ExpExactParmEst=Exact(drop=CISides Note Parameter ClassVal0 alpha); Proc GENMOD data = Sim&i; by R0 R1 true_RR N0 N1; class group(ref="1")/param=ref ; model case&i = group/dist=poisson link=log alpha=α exact group/estimate=odds alpha=α run; Data All; set All Exact(in=new); if new then Sim = &i; run; Proc datasets nolist; delete Exact Sim&i; quit; %end; *Exact = Y; %if &Exact = N %then %do; Data NPower; set _NULL_; run; ods output estimates= NPower(keep=R0 R1 true_RR N0 N1 MeanEstimate ProbChiSq MeanLowerCL MeanUpperCL); Proc GENMOD data = Sim&i; by R0 R1 true_RR N0 N1; class group i; model case&i = group/dist=poisson link=log; *==> use a sandwich estimator; repeated subject=i(group)/type=unstr; estimate "group: 2 vs. 1" group -1 1/alpha=α run; Proc SQL noprint; select count(*) into: Nrow1 from NPower; quit; %let Nrow1=&Nrow1; %if &Nrow1 = 0 %then %do; ods output ExpExactParmEst=NExact(drop=CISides Note Parameter ClassVal0 alpha); Proc GENMOD data = Sim&i; by R0 R1 true_RR N0 N1; class group(ref="1")/param=ref ; model case&i = group/dist=poisson link=log; exact group/estimate=odds alpha=α run; Data All; set All NExact(in=new); if new then Sim = &i; run; run; Proc datasets nolist; delete NExact; quit; %end;*Nrow1 = 0; %if &Nrow1 ^= 0 %then %do; Data NPower; set NPower; Estimate=MeanEstimate; LowerCL=MeanLowerCL; UpperCL=MeanUpperCL; Pvalue=ProbChiSq; drop MeanEstimate MeanLowerCL MeanUpperCL ProbChiSq; run; Data All; set All NPower(in=new); if new then Sim = &i; run; *==> create datasets that one group has case = 0 for all subjects; Data Sim&i; merge Sim&i NPower(in=s keep = R0 R1 true_RR N0 N1); by R0 R1 true_RR N0 N1; if s = 0; run; Proc SQL noprint; select count(*) into: Nrow2 from Sim&i; quit; %let Nrow2=&Nrow2; %if &Nrow2 ^= 0 %then %do; ods output ExpExactParmEst=NExact(drop=CISides Note Parameter ClassVal0 alpha); Proc GENMOD data = Sim&i; by R0 R1 true_RR N0 N1; class group(ref="1")/param=ref ; model case&i = group/dist=poisson link=log; exact group/estimate=odds alpha=α run; Data All; set All NExact(in=new); if new then Sim = &i; run; run; Proc datasets nolist; delete NExact; quit; %end;*Nrow2; %end; *Nrow1 ^= 0; Proc datasets nolist; delete Sim&i NPower; quit; %end; *Exact = N; %end; *Nrow; DM "log;clear; output;clear; odsresults;clear"; %end; *i; Proc datasets nolist; delete Sim; quit; Proc SQL noprint; select count(*) into: NAll from All; quit; %let NAll=&NAll; %if &NAll = 0 %then %do; ods listing; ods select default; title; %if &outfile ^= %then %do; ods rtf file = "&outfile" startpage=no bodytitle; %end; data _null_; file print; put "All subjects have same outcome value. None of the datasets were valid for analysis"; file log; run; %if &outfile ^= %then %do; ods rtf close; %end; %end; *NAll = 0; %if &NAll ^= 0 %then %do; Data All; set All; if true_RR > 1 and LowerCL > 1 then do; Significance = 1; Rate_Ratio = Estimate; end; else if true_RR < 1 and UpperCL > 1 then do; Significance = 1; Rate_Ratio = Estimate; end; else do; Significance = 0; Rate_Ratio = .; end; run; Proc sort data = All; by R0 R1 true_RR N0 N1; run; Proc SQL noprint; create table Power as select R0, R1, true_RR, N0, N1, count(*) as valid, round(avg(Significance),0.001) as Power from All group by R0, R1, true_RR, N0, N1 order by R0, R1, true_RR, N0, N1; select count(*) into : NPower1 from All where significance = 1; %let NPower1 = &NPower1; quit; %if &NPower1 > 0 %then %do; Proc univariate data =All noprint; where significance = 1; by R0 R1 true_RR N0 N1; var Rate_Ratio; output out = SigRRs pctlpts=&pctiles pctlpre= pctl; run; Data SigRRs; set SigRRs; %do v = 1 %to &ipn; %if &&ip&v = 2.5 %then %do; infl_P025 = pctl2_5/true_RR; %end; %if &&ip&v = 5 %then %do; infl_P05 = pctl5/true_RR; %end; %if &&ip&v = 10 %then %do; infl_P10 = pctl10/true_RR; %end; %if &&ip&v = 25 %then %do; infl_P25 = pctl25/true_RR; %end; %if &&ip&v = 50 %then %do; infl_P50 = pctl50/true_RR; %end; %if &&ip&v = 75 %then %do; infl_P75 = pctl75/true_RR; %end; %if &&ip&v = 90 %then %do; infl_P90 = pctl90/true_RR; %end; %if &&ip&v = 95 %then %do; infl_P95 = pctl95/true_RR; %end; %if &&ip&v = 97.5 %then %do; infl_P975 = pctl97_5/true_RR; %end; %end; run; Data ESM; merge Power SigRRs R_rr; by R0 R1 true_RR N0 N1; if valid = . then valid = 0; run; %end; %if &NPower1 = 0 %then %do; Data ESM; merge Power R_rr; by R0 R1 true_RR N0 N1; if valid = . then valid = 0; run; %end; ods listing; ods select default; %if &outfile ^= %then %do; ods rtf file = "&outfile" startpage=no bodytitle; %end; %if &RR1 > 1 %then %do; title "Power and distribution of observed RRs with lower bound of %sysevalf(100*(1-&alpha))% CI > 1"; title2 %if &Exact = Y %then %do; "Exact Test was used for all datasets"; %end; %if &Exact = N %then %do; "Exact Test was used only for non-convergent datasets"; %end;; %end; %if &&RR&RN < 1 %then %do; title "Power and distribution of observed RRs with upper bound of %sysevalf(100*(1-&alpha))% CI < 1"; title2 %if &Exact = Y %then %do; "Exact Test was used for all datasets"; %end; %if &Exact = N %then %do; "Exact Test was used only for non-convergent datasets"; %end;; %end; %if &RR1 < 1 and &&RR&RN > 1 %then %do; title "Power and distribution of observed RRs with:"; title2 "the upper bound of %sysevalf(100*(1-&alpha))% CI < 1 if true RR < 1"; title3 "or the lower bound of %sysevalf(100*(1-&alpha))% CI > 1 if true RR > 1"; title4 %if &Exact = Y %then %do; "Exact Test was used for all datasets"; %end; %if &Exact = N %then %do; "Exact Test was used only for non-convergent datasets"; %end;; %end; Proc Print data = ESM noobs label; format PCTL: infl: 6.3; label valid = "# valid datasets"; run; %if &outfile ^= %then %do; ods rtf close; %end; %end; *NAll ^= 0; %Mend; %ESM_Rate(R0=0.005618 0.011237 0.022473, RR=1.2 1.5 2.0 3.0, N0=17710, N1=1710, Exact = N, alpha=0.1, pctiles=10 50 90, ipctiles=50, NSim=50, seed=2412544, outfile=C:\Users\JNguyen\Desktop\Junks\example on page 18 of the Working paper 2019.rtf);